/*
 * Copyright (c) 2016, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package com.oracle.truffle.regex.tregex.nfa;

import com.oracle.truffle.regex.UnsupportedRegexException;
import com.oracle.truffle.regex.result.PreCalculatedResultFactory;
import com.oracle.truffle.regex.tregex.TRegexOptions;
import com.oracle.truffle.regex.tregex.parser.Counter;
import org.graalvm.collections.EconomicMap;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * Used for pre-calculating and finding the result of tree-like regular expressions. A regular
 * expression is tree-like if it does not contain any loops (* or +).
 */
public final class NFATraceFinderGenerator {

    /**
     * NFA we shall compute a reverse tree-shaped NFA from.
     */
    private final NFA originalNFA;

    /**
     * States of the new NFA generated by this class.
     */
    private final List<NFAState> states;

    /**
     * All states in the new NFA are copies of states of the original NFA. This map maps all
     * original states to their respective copies, in the order the copies were created, so that
     * {@code duplicatedStatesMap[originalState.getId()]} will result in a list of copies of the
     * original state, where the first entry is the first copy that was created.
     */
    private final List<NFAState>[] duplicatedStatesMap;

    /**
     * Every path through the original NFA will correspond to a path from a leaf node of the new
     * tree-shaped NFA to its root. The result of one such path corresponds to one entry in this
     * list, and the order of this list corresponds to the result's priorities -
     * {@code resultList.get(0)} has higher priority than {@code resultList.get(1)} and so on.
     */
    private final List<PreCalculatedResultFactory> resultList = new ArrayList<>();

    /**
     * Equal results are consolidated using this map.
     */
    private final EconomicMap<PreCalculatedResultFactory, PreCalculatedResultFactory> resultDeDuplicationMap = EconomicMap.create();

    private final Counter.ThresholdCounter stateID = new Counter.ThresholdCounter(TRegexOptions.TRegexMaxNFASize, "TraceFinder NFA explosion");
    private final Counter.ThresholdCounter transitionID = new Counter.ThresholdCounter(TRegexOptions.TRegexMaxNFASize, "TraceFinder NFA transition explosion");

    @SuppressWarnings("unchecked")
    private NFATraceFinderGenerator(NFA originalNFA) {
        this.originalNFA = originalNFA;
        this.states = new ArrayList<>(originalNFA.getStates().length * 2);
        this.duplicatedStatesMap = new ArrayList[originalNFA.getStates().length];
    }

    /**
     * Generates a NFA that can be used to generate a backward-searching DFA that can find the
     * result (capture group offsets) of a regex match found by a forward-searching DFA.
     * <p>
     * The idea behind this is the following: If a regular expression does not contain any loops (+
     * and *), its NFA will be a directed acyclic graph, which can be converted to a tree. If the
     * search pattern is a tree, we can find all possible results (capture group offsets) ahead of
     * time, by a simple tree traversal. Knowing all possible results in advance saves us the work
     * of updating individual result indices during the search. The only problem is that we
     * generally have to <em>find</em> the pattern in a string - the result will not necessarily
     * start at the index we start searching from.
     * <p>
     * Doing an un-anchored search with a DFA requires that we add a loop in the beginning. For
     * example, in order to find {@code /a(b|cd)/} in {@code "____acd____"}, we have to prepend the
     * expression with {@code [\u0000-\uffff]*}, and the resulting NFA can no longer be seen as a
     * tree. In order to still gain some performance from the ability to pre-calculate all results,
     * we can do the following:
     * <ul>
     * <li>We create a "reverse" tree from the NFA, so the root of the tree is the final state of
     * the original NFA. We annotate every node in the tree-shaped NFA with all pre-calculated
     * results that are reachable from the respective node. All leaves of the tree have exactly one
     * possible pre-calculated result.</li>
     * <li>We find the regex match using a regular forward searching DFA, with no regard to capture
     * groups.</li>
     * <li>The result of the forward search gives us the anchor necessary to do a search based on
     * the tree-shaped NFA, and it also gives us the information that a match has definitely been
     * found.</li>
     * <li>We can now use this information to search for the pre-calculated result that corresponds
     * to the string we found. To do so, we create a DFA from the tree-shaped NFA, where all states
     * whose NFA state set contains only one possible pre-calculated result are final states without
     * further successors.</li>
     * <li>To find the correct pre-calculated result, we simply run the new DFA in reverse
     * direction, starting from the index we found with the forward searching DFA.</li>
     * </ul>
     * 
     * <pre>
     *     {@code
     *     Example:
     *     regular expression: /a(b|c)d(e|fg)/
     *
     *     this expression has two possible results:
     *     (in the form: [start CG 0, end CG 0, start CG 1, end CG 1,... ])
     *     0: [0, 4, 1, 2, 3, 4]
     *     1: [0, 5, 1, 2, 3, 5]
     *
     *     NFA in reverse tree form:
     *          I
     *         /  \
     *         e  g
     *         |  |
     *         d  f
     *       / |  |
     *      b  c  d
     *      |  |  | \
     *      a  a  b  c
     *            |  |
     *            a  a
     *
     *     When searching for the correct result, we can immediately determine it based on the last character:
     *     e -> result 0
     *     g -> result 1
     *     }
     * </pre>
     * 
     * We also have to take care of the order in which results are to be found. For example, in the
     * expression {@code /a(b)c|ab(c)/}, we always have to return the result created from taking the
     * first branch, never the second. Therefore, we create the reverse tree-shaped NFA while
     * traversing the original NFA in priority order.
     *
     * @param nfa the NFA used for the forward-searching DFA.
     * @return a new NFA suitable to find the result while searching backward.
     */
    public static NFA generateTraceFinder(NFA nfa) {
        return new NFATraceFinderGenerator(nfa).run();
    }

    private static final class PathElement {

        private final NFAStateTransition transition;
        private int i = 0;

        private PathElement(NFAStateTransition transition) {
            this.transition = transition;
        }

        public NFAStateTransition getTransition() {
            return transition;
        }

        public boolean hasNextTransition() {
            return i < transition.getTarget().getNext().size();
        }

        public NFAStateTransition getNextTransition() {
            return transition.getTarget().getNext().get(i++);
        }
    }

    private NFA run() {
        NFAFinalState newUnAnchoredFinalState = copy(originalNFA.getReverseUnAnchoredEntry(), -1);
        NFAAnchoredFinalState newAnchoredFinalState = copy(originalNFA.getReverseAnchoredEntry(), -1);
        ArrayList<PathElement> graphPath = new ArrayList<>();
        for (NFAAbstractFinalState initialState : new NFAAbstractFinalState[]{originalNFA.getAnchoredEntry().get(0), originalNFA.getUnAnchoredEntry().get(0)}) {
            for (NFAStateTransition t : initialState.getNext()) {
                // All paths start from the original initial states, which will be duplicated and
                // become leaf nodes in the tree.
                PathElement curElement = new PathElement(t);
                while (true) {
                    // The graph-path contains nodes that have not been converted to tree form
                    // yet, and must be treated differently than the rest of the path.
                    while (duplicatedStatesMap[curElement.getTransition().getTarget().getId()] == null) {
                        graphPath.add(curElement);
                        curElement = new PathElement(curElement.getNextTransition());
                    }
                    /*
                     * We hit a node that has been converted to tree form already, so from here all
                     * nodes will have exactly one parent node (getNext().size() == 1). To create a
                     * proper tree, we have to duplicate the graphPath for all duplicates of the
                     * node we hit. Initially, we will hit one of newUnAnchoredFinalState and
                     * newAnchoredFinalState here.
                     */
                    for (NFAState duplicate : duplicatedStatesMap[curElement.getTransition().getTarget().getId()]) {
                        int resultID = resultList.size();
                        if (resultID == TRegexOptions.TRegexTraceFinderMaxNumberOfResults) {
                            throw new UnsupportedRegexException("TraceFinder: too many possible results");
                        }
                        NFAState lastCopied = copy(initialState, resultID);
                        PreCalculatedResultFactory result = resultFactory();
                        // create a copy of the graph path
                        int i = 0;
                        for (; i < graphPath.size(); i++) {
                            final NFAStateTransition pathTransition = graphPath.get(i).getTransition();
                            NFAMatcherState copy = copy((NFAMatcherState) pathTransition.getTarget(), resultID);
                            createTransition(lastCopied, copy, pathTransition, result, i);
                            lastCopied = copy;
                        }
                        // link the copied path to the existing tree
                        createTransition(lastCopied, duplicate, curElement.getTransition(), result, i);
                        // traverse the existing tree to the root to complete the pre-calculated
                        // result.
                        NFAState treeNode = duplicate;
                        while (treeNode instanceof NFAMatcherState) {
                            i++;
                            assert treeNode.getNext().size() == 1;
                            treeNode.addPossibleResult(resultID);
                            treeNode.getNext().get(0).getGroupBoundaries().applyToResultFactory(result, i);
                            treeNode = treeNode.getNext().get(0).getTarget();
                        }
                        assert treeNode instanceof NFAAbstractFinalState;
                        treeNode.addPossibleResult(resultID);
                        result.setLength(i);
                        PreCalculatedResultFactory existingResult = resultDeDuplicationMap.get(result);
                        if (existingResult == null) {
                            resultDeDuplicationMap.put(result, result);
                        } else {
                            result = existingResult;
                        }
                        resultList.add(result);
                        assert resultList.get(resultID) == result;
                    }
                    // We processed one full path. Now, we have to explore all branches in the
                    // graph-path, depth-first.
                    while (!graphPath.isEmpty() && !graphPath.get(graphPath.size() - 1).hasNextTransition()) {
                        graphPath.remove(graphPath.size() - 1);
                    }
                    if (graphPath.isEmpty()) {
                        break;
                    } else {
                        curElement = new PathElement(graphPath.get(graphPath.size() - 1).getNextTransition());
                    }
                }
            }
        }
        return new NFA(originalNFA.getAst(), null, null, newAnchoredFinalState, newUnAnchoredFinalState, states, stateID, transitionID, resultList.toArray(new PreCalculatedResultFactory[0]));
    }

    private void createTransition(NFAState source, NFAState target, NFAStateTransition originalTransition,
                    PreCalculatedResultFactory preCalcResult, int preCalcResultIndex) {
        originalTransition.getGroupBoundaries().applyToResultFactory(preCalcResult, preCalcResultIndex);
        source.setNext(new ArrayList<>(Collections.singletonList(new NFAStateTransition((short) transitionID.inc(), source, target, originalTransition.getGroupBoundaries()))), true);
    }

    private PreCalculatedResultFactory resultFactory() {
        return new PreCalculatedResultFactory(originalNFA.getAst().getNumberOfCaptureGroups());
    }

    private NFAAbstractFinalState copy(NFAAbstractFinalState state, int resultID) {
        return state instanceof NFAFinalState ? copy((NFAFinalState) state, resultID) : copy((NFAAnchoredFinalState) state, resultID);
    }

    private NFAFinalState copy(NFAFinalState state, int resultID) {
        final NFAFinalState copy = new NFAFinalState((short) stateID.inc(), state.getStateSet(), resultID);
        registerCopy(state, copy);
        return copy;
    }

    private NFAAnchoredFinalState copy(NFAAnchoredFinalState state, int resultID) {
        final NFAAnchoredFinalState copy = new NFAAnchoredFinalState((short) stateID.inc(), state.getStateSet(), resultID);
        registerCopy(state, copy);
        return copy;
    }

    private NFAMatcherState copy(NFAMatcherState s, int resultID) {
        final NFAMatcherState copy = new NFAMatcherState((short) stateID.inc(), s.getStateSet(), s.getMatcherBuilder(), s.getFinishedLookBehinds(), s.hasPrefixStates());
        copy.addPossibleResult(resultID);
        registerCopy(s, copy);
        return copy;
    }

    private void registerCopy(NFAState original, NFAState copy) {
        if (duplicatedStatesMap[original.getId()] == null) {
            duplicatedStatesMap[original.getId()] = new ArrayList<>();
        }
        duplicatedStatesMap[original.getId()].add(copy);
        states.add(copy);
        assert states.get(copy.getId()) == copy;
    }
}
